6.4 模型外推
1、模型外推的理论基础
为了实现有效预测,模型可以分为 内插和外推两种形式:
## 内插一般是在矫正的区域使用共同的环境变量进行分析,基本上不会考虑到模型在非模拟气候条件下无法内插的问题。但内插需要考虑到更多的非气候变量,保证数据的精度,但本身模型会结合物种的实际分布投影,构建适应性函数的单变量约束集,也可以起到有效的内插预测。另外,内插时,如果可以结合更多的地形或者生物变量,或许能够实现更“精致”的预测。
## 外推:外推的问题比较复杂,首先在原产地,我们就可能无法建立起完整有效的本地现实生态位,这源于若干原因:如气候或非气候生态位阻断;生态学抽样实际分布不准确、不全面;模型的参数优化;选择的环境维度是算法的拟合,而非反映物种内禀的生态需求;还有另外一些重要的问题涉及到建模的基础假设————种群平衡、生态保守、物种能够无限扩散;其实上面的 一些原因本质上流来源于这三个致命的前提假设。
## 关于这个问题,想起来mop分析和mess分析;
mop分析是分析与物种实际分布相关环境层面的整体相似性分析;而mess分析是在更大尺度的建模区域的极端非相似性环境变量分析;实质上这都是对物种的实际建模气候进行相似性分析的提前预演,保证投影的有效性。然而,物种的实际分布往往存在某种特定的生态生理极限,这种生态生理极限或许能够在世纪气候图中很好的划定,帮助有效实现物种建模。这或许是一个新的 有趣的课题。
## 目前广为使用的模型外推检测方法:
1、mess:多元表面相似性分析
2、mop:计算生态综合表面相似性分析;
3、exdet:利用重建最大相似性生态的方法:
2、空间尺度转移
2.1 mess
library(raster)
library(tidyverse )
## 构建南美洲的气候信息矩阵:
sa_enm <- raster::extract(envs,xh_sa[,2:3])
## 构建亚洲范围:
pp <- function(x){
occs <- x
xmin <- min(occs$longitude)
xmax <- max(occs$longitude)
ymin <- min(occs$latitude)
ymax <- max(occs$latitude)
bb <- matrix(c(xmin-5, xmin-5, xmax+5, xmax+5, xmin-5, ymin-5, ymax+5, ymax+5, ymin, ymin), ncol=2)
bgExt <- sp::SpatialPolygons(list(sp::Polygons(list(sp::Polygon(bb)), 1)))
return(bgExt)
}
po_sa <- pp(xh_sa)
envs_sa <- mask(crop(envs$BIO4,po_sa),po_sa) %>% mask(crop(envs,.),.)
po_as <- pp(xh_as)
envs_as <- mask(crop(envs$BIO4,po_as),po_as) %>% mask(crop(envs,.),.)
po_au <- pp(xh_au)
envs_au <- mask(crop(envs$BIO4,po_au),po_au) %>% mask(crop(envs,.),.)
po_na <- pp(xh_na)
envs_na <- mask(crop(envs$BIO4,po_na),po_na) %>% mask(crop(envs,.),.)
## 第一种方法:mess:
me_au <- dismo::mess(envs_au,sa_enm,full=TRUE)
envs_names <- c(names(envs),"rmess")
names(me_au) <- envs_names
me_au2 <- me_au>0
par(mfrow=c(3,4))
plot(me_au2,axes=F)
plot(me_au,axes=F)
2.1 mop
devtools::install_github("marlonecobos/kuenm")
library(kuenm)
help(kuenm_mmop)
data(Set3)
sets_var <- "Set3" # a vector of various sets can be used
out_mop <- "MOP_results"
percent <- 10
paral <- FALSE # make this true to perform MOP calculations in parallel, recommended
# only if a powerfull computer is used (see function's help)
# Two of the variables used here as arguments were already created for previous functions
kuenm_mmop(G.var.dir = G_var_dir, M.var.dir = M_var_dir, sets.var = sets_var, out.mop = out_mop,
percent = percent, parallel = paral)
## 外推风险分析:
## R :enmSdm
library(enmSdm)
mop(set1, set2, p, index = FALSE, cores = 1, na.rm = FALSE)
2.1 exdet
## EXDET
library(devtools)
devtools::install_github("luismurao/ntbox")
data("spermwhales")
segs <- spermwhales$segs
predgrid <- spermwhales$predgrid
#'---------------------------------------------
# Quick inspection
#'---------------------------------------------
knitr::kable(head(segs[,c(1, 3:12)]), format = "pandoc")
knitr::kable(head(predgrid), format = "pandoc")
study_area <- predgrid[, c("x", "y")]
study_area$value <- 1
study_area <- raster::rasterFromXYZ(study_area, crs = sp::CRS("+proj=aea +lat_1=38 +lat_2=30 +lat_0=34 +lon_0=-73 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0"))
study_area <- raster::rasterToPolygons(study_area, dissolve = TRUE)
study_area <- sp::spTransform(study_area, CRSobj = sp::proj4string(transects))
study_area <- smoothr::smooth(study_area, method = "ksmooth", smoothness = 5)
#'---------------------------------------------
# Produce a simple plot
#'---------------------------------------------
plot(study_area, col = "lightskyblue1") # Region boundary
plot(transects, add = TRUE, col = "skyblue3") # Survey tracks
maps::map("world", fill = TRUE, col = "grey",
xlim = range(obs$coords.x1),
ylim = range(obs$coords.x2), add = TRUE)
pts <- obs # Sightings
coordinates(pts) <- ~ coords.x1 + coords.x2
axis(1); axis(2); box()
#'---------------------------------------------
# Define projected coordinate system
#'---------------------------------------------
aftt_crs <- sp::CRS("+proj=aea +lat_1=38 +lat_2=30 +lat_0=34 +lon_0=-73 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0")
#'---------------------------------------------
# Define environmental covariates of interest
#'---------------------------------------------
covariates.spermwhale <- c("Depth", "SST", "NPP", "DistToCAS", "EKE")
## Quantifying extrapolation
spermwhale.extrapolation <- compute_extrapolation(samples = segs,
covariate.names = covariates.spermwhale,
prediction.grid = predgrid,
coordinate.system = aftt_crs)
predgrid %>%
dplyr::filter(!dplyr::between(Depth, min(segs$Depth), max(segs$Depth)) |
!dplyr::between(SST, min(segs$SST), max(segs$SST)) |
!dplyr::between(NPP, min(segs$NPP), max(segs$NPP)) |
!dplyr::between(DistToCAS, min(segs$DistToCAS), max(segs$DistToCAS)) |
!dplyr::between(EKE, min(segs$EKE), max(segs$EKE))) %>%
nrow()
str(spermwhale.extrapolation, 2)
# Example from combinatorial extrapolation
head(spermwhale.extrapolation$data$combinatorial)
##Summarising extrapolation
summary(spermwhale.extrapolation)
## Comparing covariates
compare_covariates(extrapolation.type = "both",
covariate.names = covariates.spermwhale,
n.covariates = NULL,
samples = segs,
prediction.grid = predgrid,
coordinate.system = aftt_crs,
create.plots = TRUE,
display.percent = TRUE)
points(pts, pch = 16)
## 查找附近的数据
spermwhale.nearby <- compute_nearby(samples = segs,
prediction.grid = predgrid,
coordinate.system = aftt_crs,
covariate.names = covariates.spermwhale,
nearby = 1)
## 可视化外推
obs.sp <- obs %>%
dplyr::rename(., x = coords.x1, y = coords.x2) %>%
sp::SpatialPointsDataFrame(coords = cbind(.$x, .$y), data = ., proj4string = sp::CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")) %>%
sp::spTransform(., CRSobj = aftt_crs)
map_extrapolation(map.type = "extrapolation",
extrapolation.values = spermwhale.extrapolation,
covariate.names = covariates.spermwhale,
prediction.grid = predgrid,
coordinate.system = aftt_crs,
sightings = obs.sp,
tracks = transects)
map_extrapolation(map.type = "nearby",
gower.values = spermwhale.nearby,
covariate.names = covariates.spermwhale,
prediction.grid = predgrid,
coordinate.system = aftt_crs,
sightings = obs.sp,
tracks = transects)
## 汇总分析:
## 注意:
# compare.covariates = if TRUE, run compare_covariates
# nearby.compute = if TRUE, run compute_nearby
# map.generate = if TRUE, run map_extrapolation
spermwhale.analysis <- extrapolation_analysis(samples = segs,
covariate.names = covariates.spermwhale,
prediction.grid = predgrid,
coordinate.system = aftt_crs,
compare.covariates = TRUE,
compare.extrapolation.type = "both",
compare.n.covariates = NULL,
compare.create.plots = TRUE,
compare.display.percent = TRUE,
nearby.compute = TRUE,
nearby.nearby = 1,
map.generate = TRUE,
map.sightings = obs.sp,
map.tracks = transects)
6.4.1 投影未来
```{r eval =F}
rattler.me为当代预测的概率
rattler.2070 = predict(rattler.me, modelFutureEnv)
绘制未来模型转移的结果出图:
plot(rattler.2070, main="Predicted Future Suitability") map('worldHires', fill=FALSE, add=TRUE) ##add =TURE points(rattler$lon, rattler$lat, pch="+", cex=0.2)
#### 6.4.2 外推比较(未来和现在)
##### 6.4.2.1 外推可视化比较
```{r eval =F}
##比较模型转移过程中:现代和未来:
rattler.change=rattler.2070-rattler.pred ##注意这里连接的是一个减号;
plot(rattler.change, main="Predicted Change in Suitability")
map('worldHires', fill=FALSE, add=TRUE)
points(rattler$lon, rattler$lat, pch="+", cex=0.2)
##使用直方图直观展示现代和未来对比的结果;
rattlerChangePoints = extract(rattler.change, rattlerocc)
hist(rattlerChangePoints, main="")
##注意这里是abline是线的形式;
abline(v=0, col="red")
6.4.2.2 外推时间或范围序列比较
## 这一部分暂时未完成:
## 这一部分工作可以参考时间序列模型相关的R代码:
## https://www.gis-blog.com/eva-intro-4/
## 根据之前的古气候建模学习,实际上对古气候的研究已经向相当多的模型和模拟的时期;在结合现在和未来的环境数据,或许能在时间线上进行模拟分析,至少揭示随着时间线迁移或者随着不同的大气环流模型,不同物种或者同一物种是如何进行转移的?
##以上的工作只需要提取出来一个简单的经纬度边界或者面积大小即可:
## 使用ggplot2绘图:
p <- ggplot(ams_df, aes(x = date, y = ams)) + geom_point() + geom_line()
p + stat_smooth(method = "lm", formula = y ~ x, size = 1)
6.4.1 经典研究案例
null model:
"A null model is a pattern-generating model that is based on randomization of ecological data or random sampling from a known or specified distribution. The null model is designed with respect to some ecological or evolutionary process of interest. Certain elements of the data are held constant, and others are allowed to vary stochastically to create new assemblage patterns. The randomization is designed to produce a pattern that would be expected in the absence of a particular ecological mechanism".
## 简单来说,零模型的本质是构建实际分布,然后通过反复重复随机抽样来评估在生态过程中随机分布与实际分布之间的差异,并将这种差异进行显著性比较。
论文
-- Null versus neutral models: what’s the difference?
N. J. Gotelli, (ngotelli@uvm.edu), Dept of Biology, Univ. of Vermont, Burlington, VT 05405,USA. Brian J. McGill, Dept of Biology, McGill Univ., Stewart Biology Bldg, 1205 Dr. Penfield Ave., Montreal, QC H3A 1B1, Canada
补充:
EXDET3
## https://besjournals.onlinelibrary.wiley.com/doi/full/10.1111/2041-210X.13469
## #' ------------------------------------------------------------------------
#' ------------------------------------------------------------------------
#'
#' dsmextra: Extrapolation assessment tools for density surface models
#'
#' Bouchet et al. (2020). Methods in Ecology and Evolution
#'
#' --- 02: EXAMPLE ANALYSIS ---
#'
#' ------------------------------------------------------------------------
#' ------------------------------------------------------------------------
# Required libraries ------------------------------------------------------
# Load and install required packages
# Note: This requires the pacman package, which can be installed with
# the following command:
install.packages("pacman")
# The latest version of the dsmextra package can be installed from Github using:
remotes::install_github("densitymodelling/dsmextra")
pacman::p_load(dsm, # Density surface modelling of distance sampling data
dsmextra, # Extrapolation toolkit for ecological models
Distance, # Distance sampling detection function and abundance estimation
raster, # Geographic data analysis and modelling
tidyverse, # Tools and functions for data science
sf, # Simple features in R
magrittr, # Code semantics
GGally, # Extension to ggplot2
viridisLite, # Default Color Maps from 'matplotlib'
smoothr, # Smooth and tidy spatial features
sp) # Classes and methods for spatial data
set.seed(42) # Set the random seed
# Set general options
options(tibble.width = Inf) # All tibble columns shown
options(pillar.neg = FALSE) # No colouring of negative numbers
options(pillar.subtle = TRUE)
options(pillar.sigfig = 4)
# ggplot2 options
gg.opts <- theme(panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.title = element_text(size = 13, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 11),
axis.text = element_text(size = 11),
panel.grid.major = element_line(colour = 'lightgrey', size = 0.1),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
legend.position = "bottom")
# Geographic coordinate systems
latlon_proj <- sp::CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
aftt_proj <- sp::CRS("+proj=aea +lat_1=40.66666666666666 +lat_2=27.33333333333333 +lat_0=34 +lon_0=-78 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0")
GulfMexico_proj <- sp::CRS("+proj=lcc +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs ")
# Load and prepare the data ------------------------------------------------------
source("01_Bouchet_etal_Functions.R") # Load all functions
# Distance sampling data from package dsm
data("mexdolphins")
head(distdata) # Distance sampling data that will be used to fit the detection function.
head(obsdata) # Links the distance data to the segments.
# Segment data, i.e. the transects have already been “chopped” into segments.
survey.segdata <- segdata %>% dplyr::select(-depth)
head(survey.segdata)
# Create a spatial object for survey tracklines
survey.tracks <- survey.segdata %>%
split(.$Transect.Label) %>%
purrr::map(.x = ., .f = ~dplyr::select(.x, x, y) %>%
as.matrix(.) %>%
raster::spLines(., crs = GulfMexico_proj)) %>%
do.call("rbind", .) %>%
sp::spTransform(., CRSobj = latlon_proj)
# Shapefiles of the study area and the surrounding landmass
survey.area <- raster::shapefile("study_area.shp")
america <- raster::shapefile("america.shp")
# Prediction area (Gulf of Mexico)
prediction.area <- raster::shapefile("gulf_mexico.shp")
prediction.area <- smoothr::smooth(prediction.area, method = "ksmooth") # Make edges smoother
prediction.area_proj <- sp::spTransform(prediction.area, CRSobj = aftt_proj) # Project
# Environmental covariates ------------------------------------------------
# Load the covariate rasters.
load("covariates.rda")
# Covariates include:
# (1) Bathymetric depth [static]
# (2) Seabed slope [static]
# (3) Distance to the coast [static]
# (4) Distance to the nearest submarine canyon or seamount [static]
# (5) Current speed [dynamic]
# (6) Sea surface temperature [dynamic]
# Note that for simplicity, dynamic covariates were summarised as a
# yearly median across 12 monthly rasters.
# Note also that the raster stack in covariates.rda also contains layer representing the surface
# area of each grid cell. This will be useful when making predictions from density surface models,
# which include an offset term for the area effectively surveyed.
# Prediction grid and segments --------------------------------------------
# Create the final prediction data.frame
pred.grid <- raster::stack(env.rasters) %>%
raster::projectRaster(from = ., crs = GulfMexico_proj) %>%
raster::as.data.frame(., xy = TRUE) %>%
na.omit()
# Retrieve covariate values for each segment
survey.segdata <- raster::projectRaster(from = raster::stack(env.rasters), crs = GulfMexico_proj) %>%
raster::extract(x = ., y = sp::SpatialPointsDataFrame(coords = survey.segdata[, c("x", "y")],
data = survey.segdata, proj4string = GulfMexico_proj), sp = TRUE) %>%
raster::as.data.frame(.) %>%
dplyr::select(-c(x.1, y.1))
# Collinearity ------------------------------------------------------------
# Check collinearity between covariates
survey.segdata[, names(env.rasters)[1:6]] %>%
GGally::ggcorr(., geom = "blank", label = TRUE, hjust = 0.75, method = c('complete.obs', 'pearson')) +
geom_point(size = 10, aes(color = coefficient > 0, alpha = abs(coefficient) > 0.7)) +
scale_alpha_manual(values = c("TRUE" = 0.25, "FALSE" = 0)) +
guides(color = FALSE, alpha = FALSE)
# Extrapolation assessment ------------------------------------------------
# Define environmental covariates of interest
stenella.covariates <- names(env.rasters)[1:6]
# Univariate + combinatorial extrapolation (ExDet)
stenella.exdet <- dsmextra::compute_extrapolation(samples = survey.segdata,
covariate.names = stenella.covariates,
prediction.grid = pred.grid,
coordinate.system = GulfMexico_proj)
summary(stenella.exdet)
# Percentage of data nearby (%N)
stenella.nearby <- dsmextra::compute_nearby(samples = survey.segdata,
covariate.names = stenella.covariates,
prediction.grid = pred.grid,
nearby = 1,
coordinate.system = GulfMexico_proj)
# Extrapolation maps ------------------------------------------------
# (1) Univariate + combinatorial extrapolation (ExDet)
dsmextra::map_extrapolation(map.type = "extrapolation",
base.layer = "ocean",
extrapolation.values = stenella.exdet,
covariate.names = stenella.covariates,
prediction.grid = pred.grid,
coordinate.system = GulfMexico_proj)
# (2) Most influential covariates (MIC)
dsmextra::map_extrapolation(map.type = "mic",
base.layer = "gray",
extrapolation.values = stenella.exdet,
covariate.names = stenella.covariates,
prediction.grid = pred.grid,
coordinate.system = GulfMexico_proj)
# (3) Percentage of data nearby (%N)
dsmextra::map_extrapolation(map.type = "nearby",
base.layer = "ocean",
gower.values = stenella.nearby,
covariate.names = stenella.covariates,
prediction.grid = pred.grid,
coordinate.system = GulfMexico_proj)
# Compare the extent of univariate and combinatorial extrapolation
# (ExDet metric) associated with different combinations of covariates
dsmextra::compare_covariates(extrapolation.type = "both",
samples = survey.segdata,
n.covariates = NULL, # All possible combinations
covariate.names = stenella.covariates,
prediction.grid = pred.grid,
coordinate.system = GulfMexico_proj,
create.plots = TRUE,
display.percent = TRUE)
# Detection function model ------------------------------------------------------
# Fit a simple hazard-rate detection model to the distance data (no covariates)
detfc.hr.null <- Distance::ds(data = distdata, truncation = max(distdata$distance),
key = "hr", adjustment = NULL)
# Plot the fitted model on top of a histogram of distances
plot(detfc.hr.null, showpoints = FALSE, lwd = 2, pl.col = "lightblue")
# Spatial GAM -------------------------------------------------------------
# Create model formulae with and without chosen covariates
dsm.formulas <- make_formulas(remove.covariates = c("depth", "sst"))
# Fit the two competing density surface models: 0 (all covariates), 1 (filtered set)
dsm.0 <- dsm::dsm(formula = dsm.formulas$f0,
ddf.obj = detfc.hr.null,
segment.data = survey.segdata,
observation.data = obsdata,
method = "REML",
family = tw())
dsm.1 <- dsm::dsm(formula = dsm.formulas$f1,
ddf.obj = detfc.hr.null,
segment.data = survey.segdata,
observation.data = obsdata,
method = "REML",
family = tw())
AIC(dsm.0, dsm.1) # AIC scores
# Residual checks
model_checks(dsm.0)
model_checks(dsm.1)
# Model predictions
dsm.0.pred <- predict(object = dsm.0, pred.grid, pred.grid$area)
dsm.1.pred <- predict(object = dsm.1, pred.grid, pred.grid$area)
# Abundance estimates
sum(dsm.0.pred); sum(dsm.1.pred)
# Variance estimation - there are no covariates in the detection function
# so we use dsm.var.gam rather than dsm.var.prop
# The below code may take a few minutes.
varsplit <- split(pred.grid, 1:nrow(pred.grid))
dsm.0.var <- dsm::dsm.var.gam(dsm.obj = dsm.0, pred.data = varsplit, off.set = pred.grid$area)
dsm.1.var <- dsm::dsm.var.gam(dsm.obj = dsm.1, pred.data = varsplit, off.set = pred.grid$area)
summary(dsm.0.var)
summary(dsm.1.var)
# Mapping -----------------------------------------------------------------
# Plot predicted density surfaces
plot_predictions(dsm.predictions = dsm.0.pred)
plot_predictions(dsm.predictions = dsm.1.pred)
# Plot uncertainty surfaces
plot_uncertainty(varprop.output = dsm.0.var, cutpoints = c(0,1,1.5,2,3,4))
plot_uncertainty(varprop.output = dsm.1.var, cutpoints = c(0,1,1.5,2,3,4))